View Javadoc

1   
2   /*
3    * SmartCrawler
4    *
5    * $Id: LinkFilter.java,v 1.4 2005/08/05 15:55:53 vincool Exp $
6    * Copyright 2005 Davide Pozza
7    *
8    * This program is free software; you can redistribute it
9    * and/or modify it under the terms of the GNU General Public
10   * License as published by the Free Software Foundation;
11   * either version 2 of the License, or (at your option) any
12   * later version.
13   *
14   * This program is distributed in the hope that it will be
15   * useful, but WITHOUT ANY WARRANTY; without even the implied
16   * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
17   * PURPOSE. See the GNU General Public License for more
18   * details.
19   *
20   * You should have received a copy of the GNU General Public
21   * License along with this program; if not, write to the Free
22   * Software Foundation, Inc., 59 Temple Place, Suite 330,
23   * Boston, MA 02111-1307 USA
24   *
25   */
26  
27  package org.smartcrawler.filter;
28  import org.apache.commons.lang.StringUtils;
29  import org.apache.log4j.Logger;
30  import org.smartcrawler.common.AbstractParametrizableComponent;
31  import org.smartcrawler.common.Context;
32  import org.smartcrawler.common.Link;
33  import org.smartcrawler.common.SCLogger;
34  
35  
36  /***
37   *
38   *
39   * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
40   * @version <tt>$Revision: 1.4 $</tt>
41   */
42  public class LinkFilter extends AbstractParametrizableComponent implements PrecFilterLink {
43  
44      private static Logger log = SCLogger.getLogger(LinkFilter.class);
45  
46      /***
47       *
48       * @param link
49       * @return
50       */
51  /*    public boolean isPermitted(Context conf, Link link) {
52          log.debug("isPermitted() BEGIN");
53          String linksStr = getParameter("links");
54          StringTokenizer st = new StringTokenizer(linksStr);
55          String[] links = new String[st.countTokens()];
56          int i = 0;
57          boolean res = false;
58          while (st.hasMoreTokens()) {
59              links[i] = st.nextToken();
60              if (link.toString().indexOf(links[i]) >= 0) {
61                  res = true;
62                  log.debug("Checking link: " + link.toString()
63                  + " VS " + links[i] + " res="+res);
64                  break;
65              } else {
66                  log.debug("Checking link: " + link.toString()
67                  + " VS " + links[i] + " res="+res);
68              }
69          }
70          log.debug("isPermitted() END");
71          return res;
72      }*/
73      public boolean isPermitted(Context conf, Link link) {
74          log.debug("isPermitted() BEGIN");
75          boolean res = false;
76          try {
77              String[] items = getParameters("links");
78              log.debug("isPermitted() items.length=" + items.length);
79  
80              for (int i = 0; i < items.length; i++) {
81                  items[i] = StringUtils.replace(items[i], ".", "//.");
82                  items[i] = StringUtils.replace(items[i], "*", ".*");
83                  if (link.toString().matches(items[i])) {
84                      res = true;
85                      log.debug("Checking link: " + link.toString()
86                      + " VS " + items[i] + " res="+res);
87                      break;
88                  } else {
89                      log.debug("Checking link: " + link.toString()
90                      + " VS " + items[i] + " res="+res);
91                  }
92              }
93          }catch (Exception e) {
94              log.warn("Filter error", e);
95          }
96          log.debug("isPermitted() END");
97          return res;
98      }
99  }